Code
library(dplyr)
library(tidyr)
library(scran)
library(scater)
library(ggplot2)
library(patchwork)
library(SpatialExperiment)dir <- file.path("..", "data", "Visium")
ids <- list.dirs(dir, recursive = FALSE)
ids <- ids[grep("^B[0-9]+", basename(ids))]
names(ids) <- gsub("_[0-9]*$", "", basename(ids))
spe <- read10xVisium(ids, type = "HDF5", images = "lowres")
spe$barcode <- colnames(spe); colnames(spe) <-
paste(spe$sample_id, spe$barcode, sep = ".")
ccRCC LSCC LUAD
9564 16010 2158
csv <- file.path(dir, "metadata", "labels.csv")
lab <- read.csv(csv, row.names = 1)
lab$TLS[lab$TLS == ""] <- NA
spe$label <- factor(NA, unique(lab$TLS), exclude = NULL)
idx <- split(seq(ncol(spe)), spe$sample_id)
lab <- split(lab, lab$Patient_ID)
for (. in names(ids)) {
bcs <- gsub("-[0-9]$", "-1", lab[[.]]$Barcode)
xdi <- match(spe[, idx[[.]]]$barcode, bcs)
spe[, idx[[.]]]$label <- lab[[.]]$TLS
}
table(spe$sample_id, spe$label)
<NA> NOR TUM INFL TLS EXCL LN
B04_17776 1814 1664 585 141 161 0 0
B05_32288 1869 574 355 136 80 0 0
B05_8527 2584 330 1089 40 43 87 0
B06_24137 1494 1137 1042 62 407 0 33
B06_24784 1064 1126 718 178 138 0 0
B07_30616 1372 266 491 6 32 0 0
B07_38596 2270 493 593 27 96 977 0
B15_11190 1568 160 364 39 27 0 0
class: SpatialExperiment
dim: 17878 27732
metadata(0):
assays(1): counts
rownames(17878): SAMD11 NOC2L ... TMSB4Y KDM5D
rowData names(1): symbol
colnames(27732): B04_17776.AAACAACGAATAGTTC-1
B04_17776.AAACAAGTATCTCCCA-1 ... B15_11190.TTGTTTCATTAGTCTA-1
B15_11190.TTGTTTCCATACAACT-1
colData names(7): in_tissue array_row ... TumorType label
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):
spatialCoords names(2) : x y
imgData names(4): sample_id image_id data scaleFactor
dr <- rd %>%
select(matches("subsets.*detected")) %>%
pivot_longer(everything()) %>%
mutate(sample_id = gsub("subsets_(.*)_detected", "\\1", name))
ggplot(dr, aes(value, col = sample_id)) &
labs(x = "% of cells with non-zero count") &
geom_density(key_glyph = "point") & scale_x_sqrt() &
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) &
theme_bw() & theme(aspect.ratio = 2/3,
panel.grid.minor = element_blank(),
legend.key.size = unit(0.5, "lines")) ggplot(cd, aes(total, col = sample_id)) +
ggplot(cd, aes(detected, col = sample_id)) +
ggplot(cd, aes(total, col = label)) +
ggplot(cd, aes(detected, col = label)) +
plot_layout(nrow = 2, guides = "collect") &
geom_density(key_glyph = "point") & scale_x_log10() &
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) &
theme_bw() & theme(legend.key.size = unit(0.5, "lines")) ns <- as.data.frame(table(sample_id = spe$sample_id, label = spe$label))
ggplot(ns, aes(label, Freq, fill = label)) +
facet_wrap(~ sample_id, scales = "free_y", nrow = 2) +
geom_bar(stat = "identity", position = "dodge") +
labs(y = "# spots") + theme_bw() + theme(
panel.grid.minor = element_blank(),
legend.key.size = unit(0.5, "lines"),
axis.text.x = element_text(angle = 45, hjust = 1))ggplot(ns, aes(sample_id, Freq, fill = sample_id)) +
facet_wrap(~ label, scales = "free_y", nrow = 2) +
geom_bar(stat = "identity", position = "dodge") +
labs(y = "# spots") + theme_bw() + theme(
panel.grid = element_blank(),
legend.key.size = unit(0.5, "lines"),
axis.text.x = element_text(angle = 45, hjust = 1))
FALSE TRUE
178 27554
# for each tumor type, get genes detected
# in at least 1% of cells in all samples
y <- t(as.matrix(counts(spe) > 0))
idx <- split(seq(ncol(spe)), spe$TumorType == "ccRCC")
det <- lapply(idx, \(.) {
ids <- droplevels(spe$sample_id[.])
det <- by(y[., ], ids, \(.) colnames(.)[colMeans(.) >= 0.01])
Reduce(intersect, det)
})
sapply(det, length)FALSE TRUE
9491 10364
[1] 8701
[1] 8701 27554
[1] 8701 26490
R version 4.3.0 (2023-04-21)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.2.1
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/Zurich
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] SpatialExperiment_1.10.0 patchwork_1.1.2
[3] scater_1.28.0 ggplot2_3.4.2
[5] scran_1.28.1 scuttle_1.10.1
[7] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.1
[9] Biobase_2.60.0 GenomicRanges_1.52.0
[11] GenomeInfoDb_1.36.0 IRanges_2.34.0
[13] S4Vectors_0.38.1 BiocGenerics_0.46.0
[15] MatrixGenerics_1.12.0 matrixStats_0.63.0
[17] tidyr_1.3.0 dplyr_1.1.2
loaded via a namespace (and not attached):
[1] bitops_1.0-7 gridExtra_2.3
[3] rlang_1.1.1 magrittr_2.0.3
[5] compiler_4.3.0 DelayedMatrixStats_1.22.0
[7] vctrs_0.6.2 pkgconfig_2.0.3
[9] crayon_1.5.2 fastmap_1.1.1
[11] magick_2.7.4 XVector_0.40.0
[13] utf8_1.2.3 rmarkdown_2.21
[15] ggbeeswarm_0.7.2 purrr_1.0.1
[17] xfun_0.39 bluster_1.10.0
[19] zlibbioc_1.46.0 beachmat_2.16.0
[21] jsonlite_1.8.4 rhdf5filters_1.12.1
[23] DelayedArray_0.26.2 Rhdf5lib_1.22.0
[25] BiocParallel_1.33.11 irlba_2.3.5.1
[27] parallel_4.3.0 cluster_2.1.4
[29] R6_2.5.1 limma_3.56.0
[31] Rcpp_1.0.10 knitr_1.42
[33] R.utils_2.12.2 Matrix_1.5-4
[35] igraph_1.4.2 tidyselect_1.2.0
[37] rstudioapi_0.14 yaml_2.3.7
[39] viridis_0.6.3 codetools_0.2-19
[41] lattice_0.21-8 tibble_3.2.1
[43] withr_2.5.0 evaluate_0.21
[45] pillar_1.9.0 generics_0.1.3
[47] RCurl_1.98-1.12 sparseMatrixStats_1.12.0
[49] munsell_0.5.0 scales_1.2.1
[51] glue_1.6.2 metapod_1.8.0
[53] tools_4.3.0 BiocNeighbors_1.18.0
[55] ScaledMatrix_1.8.1 locfit_1.5-9.7
[57] rhdf5_2.44.0 grid_4.3.0
[59] DropletUtils_1.20.0 edgeR_3.42.2
[61] colorspace_2.1-0 GenomeInfoDbData_1.2.10
[63] beeswarm_0.4.0 BiocSingular_1.16.0
[65] HDF5Array_1.28.1 vipor_0.4.5
[67] cli_3.6.1 rsvd_1.0.5
[69] fansi_1.0.4 S4Arrays_1.0.1
[71] viridisLite_0.4.2 gtable_0.3.3
[73] R.methodsS3_1.8.2 digest_0.6.31
[75] ggrepel_0.9.3 dqrng_0.3.0
[77] rjson_0.2.21 htmlwidgets_1.6.2
[79] htmltools_0.5.5 R.oo_1.25.0
[81] lifecycle_1.0.3 statmod_1.5.0